{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "# 如果当前代码文件运行测试需要加入修改路径，避免出现后导包问题\n",
    "BASE_DIR = os.path.dirname(os.path.dirname(os.getcwd()))\n",
    "sys.path.insert(0, os.path.join(BASE_DIR))\n",
    "\n",
    "PYSPARK_PYTHON = \"/miniconda2/envs/reco_sys/bin/python\"\n",
    "# 当存在多个版本时，不指定很可能会导致出错\n",
    "os.environ[\"PYSPARK_PYTHON\"] = PYSPARK_PYTHON\n",
    "os.environ[\"PYSPARK_DRIVER_PYTHON\"] = PYSPARK_PYTHON\n",
    "\n",
    "from offline import SparkSessionBase\n",
    "import pyhdfs\n",
    "import time\n",
    "\n",
    "\n",
    "class UpdateUserProfile(SparkSessionBase):\n",
    "    \"\"\"离线相关处理程序\n",
    "    \"\"\"\n",
    "    SPARK_APP_NAME = \"updateUser\"\n",
    "    ENABLE_HIVE_SUPPORT = True\n",
    "\n",
    "    SPARK_EXECUTOR_MEMORY = \"7g\"\n",
    "\n",
    "    def __init__(self):\n",
    "\n",
    "        self.spark = self._create_spark_session()\n",
    "\n",
    "uup = UpdateUserProfile()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------+---------+--------------------+----------------+--------+-------------------+\n",
      "|         actionTime|readTime|channelId|           articleId|algorithmCombine|  action|             userId|\n",
      "+-------------------+--------+---------+--------------------+----------------+--------+-------------------+\n",
      "|2019-04-09 07:13:25|        |        0|[15716, 19171, 13...|              C2|exposure|1103195673450250240|\n",
      "|2019-04-09 07:13:49|        |       18|               19171|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:02|   12238|       18|               19171|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:14:04|        |       18|               13797|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:27|   21820|       18|               13797|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:13:25|        |        0|[15716, 19171, 13...|              C2|exposure|1103195673450250240|\n",
      "|2019-04-09 07:13:49|        |       18|               19171|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:02|   12238|       18|               19171|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:14:04|        |       18|               13797|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:27|   21820|       18|               13797|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:13:25|        |        0|[15716, 19171, 13...|              C2|exposure|1103195673450250240|\n",
      "|2019-04-09 07:13:49|        |       18|               19171|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:02|   12238|       18|               19171|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:14:04|        |       18|               13797|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:27|   21820|       18|               13797|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:13:25|        |        0|[15716, 19171, 13...|              C2|exposure|1103195673450250240|\n",
      "|2019-04-09 07:13:49|        |       18|               19171|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:02|   12238|       18|               19171|              C2|    read|1103195673450250240|\n",
      "|2019-04-09 07:14:04|        |       18|               13797|              C2|   click|1103195673450250240|\n",
      "|2019-04-09 07:14:27|   21820|       18|               13797|              C2|    read|1103195673450250240|\n",
      "+-------------------+--------+---------+--------------------+----------------+--------+-------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# - 2、读取固定时间内的用户行为日志\n",
    "uup.spark.sql(\"use profile\")\n",
    "user_action = uup.spark.sql(\"select actionTime, readTime, channelId, param.articleId, param.algorithmCombine, param.action, param.userId from user_action where dt>='2019-04-01'\")\n",
    "user_action.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# - 3、进行用户日志数据处理\n",
    "def _compute(row):\n",
    "    \n",
    "    _list = []\n",
    "    if row.action == 'exposure':\n",
    "        for article_id in eval(row.articleId):\n",
    "            # 用户ID跟文章ID拼接一个样本\n",
    "            # [\"user_id\", \"action_time\",\"article_id\", \"channel_id\", \"shared\", \"clicked\", \"collected\", \"exposure\", \"read_time\"]\n",
    "            _list.append([row.userId, row.actionTime, article_id, row.channelId, False, False, False, True, row.readTime])\n",
    "        return _list\n",
    "    else:\n",
    "        class Temp(object):\n",
    "            shared = False\n",
    "            clicked = False\n",
    "            collected = False\n",
    "            read_time = \"\"\n",
    "        \n",
    "        _tp = Temp()\n",
    "        if row.action == 'click':\n",
    "            _tp.clicked = True\n",
    "        elif row.action == 'share':\n",
    "            _tp.shared = True\n",
    "        elif row.action == 'collect':\n",
    "            _tp.collected = True\n",
    "        elif row.action == 'read':\n",
    "            _tp.clicked = True\n",
    "        else:\n",
    "            pass\n",
    "        \n",
    "        _list.append([row.userId, row.actionTime, int(row.articleId), row.channelId, _tp.shared, _tp.clicked, _tp.collected, True, row.readTime])\n",
    "        \n",
    "        return _list\n",
    "        \n",
    "\n",
    "_res = user_action.rdd.flatMap(_compute)\n",
    "\n",
    "user_action_basic = _res.toDF([\"user_id\", \"action_time\",\"article_id\", \"channel_id\", \"shared\", \"clicked\", \"collected\", \"exposure\", \"read_time\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-------------------+----------+----------+------+-------+---------+--------+---------+\n",
      "|            user_id|        action_time|article_id|channel_id|shared|clicked|collected|exposure|read_time|\n",
      "+-------------------+-------------------+----------+----------+------+-------+---------+--------+---------+\n",
      "|1103195673450250240|2019-04-09 07:13:25|     15716|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     19171|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     13797|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     17511|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     18795|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     18156|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     43885|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     13167|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     13039|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     18038|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:49|     19171|        18| false|   true|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:14:02|     19171|        18| false|   true|    false|    true|    12238|\n",
      "|1103195673450250240|2019-04-09 07:14:04|     13797|        18| false|   true|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:14:27|     13797|        18| false|   true|    false|    true|    21820|\n",
      "|1103195673450250240|2019-04-09 07:13:25|     15716|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     19171|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     13797|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     17511|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     18795|         0| false|  false|    false|    true|         |\n",
      "|1103195673450250240|2019-04-09 07:13:25|     18156|         0| false|  false|    false|    true|         |\n",
      "+-------------------+-------------------+----------+----------+------+-------+---------+--------+---------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "user_action_basic.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# - 4、存储到user_article_basic表中\n",
    "old = uup.spark.sql(\"select * from user_article_basic\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "new = old.unionAll(user_action_basic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# new.registerTempTable('temptable')\n",
    "\n",
    "# uup.spark.sql(\"insert overwrite table user_article_basic select user_id, max(action_time) as action_time, \"\n",
    "#         \"article_id, max(channel_id) as channel_id, max(shared) as shared, max(clicked) as clicked, \"\n",
    "#         \"max(collected) as collected, max(exposure) as exposure, max(read_time) as read_time from temptable \"\n",
    "#         \"group by user_id, article_id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 用户画像的关键词获取以及权重计算\n",
    "# 1、读取user_article_basic表，合并行为表与文章画像中的主题词\n",
    "uup.spark.sql(\"use profile\")\n",
    "user_basic = uup.spark.sql(\"select * from user_article_basic\").drop('channel_id')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-------------------+-------------------+------+-------+---------+--------+---------+\n",
      "|            user_id|        action_time|         article_id|shared|clicked|collected|exposure|read_time|\n",
      "+-------------------+-------------------+-------------------+------+-------+---------+--------+---------+\n",
      "|1105045287866466304|2019-03-11 18:13:45|              14225| false|  false|    false|    true|         |\n",
      "|1106476833370537984|2019-03-15 16:46:50|              14208| false|  false|    false|    true|         |\n",
      "|1109980466942836736|2019-03-25 08:50:36|              19233| false|  false|    false|    true|         |\n",
      "|1109980466942836736|2019-03-25 16:40:37|              44737| false|  false|    false|    true|         |\n",
      "|1109993249109442560|2019-03-25 09:39:48|              17283| false|  false|    false|    true|         |\n",
      "|1111189494544990208|2019-03-28 17:02:35|              19322| false|  false|    false|    true|         |\n",
      "|1111524501104885760|2019-03-29 15:04:27|              44161| false|  false|    false|    true|         |\n",
      "|1112727762809913344|2019-04-03 12:51:57|              18172| false|   true|     true|    true|    19413|\n",
      "|1113020831425888256|2019-04-02 18:10:20|1112592065390182400| false|  false|    false|    true|         |\n",
      "|1114863735962337280|2019-04-07 20:13:43|              17665| false|  false|    false|    true|         |\n",
      "|1114863741448486912|2019-04-07 20:13:39|              14208| false|  false|    false|    true|         |\n",
      "|1114863751909081088|2019-04-09 16:40:43|              13751| false|  false|    false|    true|         |\n",
      "|1114863846486441984|2019-04-09 16:38:59|              17940| false|  false|    false|    true|         |\n",
      "|1114863941936218112|2019-04-07 20:20:49|              15196| false|  false|    false|    true|         |\n",
      "|1114863998437687296|2019-04-09 16:47:19|              19233| false|  false|    false|    true|         |\n",
      "|1114864164158832640|2019-04-09 16:42:35|             141431| false|  false|    false|    true|         |\n",
      "|1114864237131333632|2019-04-09 16:34:48|              13797| false|  false|    false|    true|         |\n",
      "|1114864354622177280|2019-04-09 16:39:15|             134812| false|  false|    false|    true|         |\n",
      "|1115089292662669312|2019-04-08 11:09:40|1112608068731928576| false|  false|    false|    true|         |\n",
      "|1115534909935452160|2019-04-09 16:47:19|              18156| false|  false|    false|    true|         |\n",
      "+-------------------+-------------------+-------------------+------+-------+---------+--------+---------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "user_basic.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#读取文章画像\n",
    "uup.spark.sql(\"use article\")\n",
    "article_topic = uup.spark.sql(\"select  article_id, channel_id, topics from article_profile\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+----------+--------------------+\n",
      "|article_id|channel_id|              topics|\n",
      "+----------+----------+--------------------+\n",
      "|        26|        17|[Electron, 全自动, 产...|\n",
      "|        29|        17|[WebAssembly, 影音,...|\n",
      "|       474|        17|[textAlign, borde...|\n",
      "|       964|        11|[protocol, RMI, d...|\n",
      "|      1677|        17|[spritesmith, ico...|\n",
      "|      1697|         6|[nav, 样式, width, ...|\n",
      "|      1806|        17|[声明, word, 容器, Ex...|\n",
      "|      1950|        17|[app, scss, koala...|\n",
      "|      2040|        17|[宽度, 媒体, width, r...|\n",
      "|      2214|        11|[Cyber, 语言, 黑客, 知...|\n",
      "|      2250|         6|[宽度, cal, 阶梯, 页面,...|\n",
      "|      2453|        13|[__, CNN, logisti...|\n",
      "|      2509|        13|[池化, CNN, 卷积神经网络,...|\n",
      "|      2529|        17|[标题栏, 定义, 嵌套, hea...|\n",
      "|      2927|         6|[季风, 圆角, bezier, ...|\n",
      "|      3091|         6|[Chrome, react, 工...|\n",
      "|      3506|        17|[cond, AJAX, 实心, ...|\n",
      "|      3764|        15|[__, 语言, 原型链, obj...|\n",
      "|      4590|        19|[println, Class, ...|\n",
      "|      4823|        19|[引用传递, Human, TOD...|\n",
      "+----------+----------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "article_topic.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_topic = user_basic.join(article_topic, on=['article_id'], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------------------+\n",
      "|article_id|            user_id|        action_time|shared|clicked|collected|exposure|read_time|channel_id|              topics|\n",
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------------------+\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|[补码, 字符串, 李白, typ...|\n",
      "|     13401|1114864237131333632|2019-04-09 16:39:51| false|  false|    false|    true|         |        18|[补码, 字符串, 李白, typ...|\n",
      "|     13401|1106396183141548032|2019-03-28 10:58:20| false|  false|    false|    true|         |        18|[补码, 字符串, 李白, typ...|\n",
      "|     13401|1109994594201763840|2019-03-26 15:03:58| false|  false|    false|    true|         |        18|[补码, 字符串, 李白, typ...|\n",
      "|     14805|1105045287866466304|2019-03-11 18:15:48| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1114865875103514624|2019-04-09 16:44:09| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|                  1|2019-03-05 17:34:03| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1113004557979353088|2019-04-04 08:31:44| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1115534909935452160|2019-04-09 16:46:37| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1115089292662669312|2019-04-09 16:39:49| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1114864434305564672|2019-04-09 16:42:37| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1112715153402494976|2019-04-01 21:56:48| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1114863998437687296|2019-04-09 16:41:09| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1111524501104885760|2019-03-29 15:05:28| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1114864237131333632|2019-04-09 16:34:48| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1111189494544990208|2019-03-28 16:57:45| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1106476833370537984|2019-03-15 16:48:08| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1114864474352779264|2019-04-07 20:19:42| false|   true|    false|    true|    32360|        18|[占位符, Code, sep, ...|\n",
      "|     14805|1109995683777085440|2019-03-25 09:53:07| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "|     14805|1113053603926376448|2019-04-02 20:23:35| false|  false|    false|    true|         |        18|[占位符, Code, sep, ...|\n",
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "user_topic.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyspark.sql.functions as F\n",
    "\n",
    "user_topic = user_topic.withColumn('topic', F.explode('topics')).drop('topics')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------+\n",
      "|article_id|            user_id|        action_time|shared|clicked|collected|exposure|read_time|channel_id|   topic|\n",
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------+\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      补码|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|     字符串|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      李白|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|    type|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      元素|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|    删除元素|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      负数|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      基数|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|     tp2|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|    数据类型|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|     二进制|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|xiaoming|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      大写|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      示例|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      字典|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|     八进制|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|      元组|\n",
      "|     13401|                 10|2019-03-06 10:06:12| false|  false|    false|    true|         |        18|   print|\n",
      "|     13401|1114864237131333632|2019-04-09 16:39:51| false|  false|    false|    true|         |        18|      补码|\n",
      "|     13401|1114864237131333632|2019-04-09 16:39:51| false|  false|    false|    true|         |        18|     字符串|\n",
      "+----------+-------------------+-------------------+------+-------+---------+--------+---------+----------+--------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "user_topic.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_user_label_weights(partitions):\n",
    "    \"\"\"# 计算用户关键词权重\n",
    "    \"\"\"\n",
    "    weightsOfaction = {\n",
    "        \"read_min\": 1,\n",
    "        \"read_middle\": 2,\n",
    "        \"collect\": 2,\n",
    "        \"share\": 3,\n",
    "        \"click\": 5\n",
    "    }\n",
    "    \n",
    "    # 导入包\n",
    "    from datetime import datetime\n",
    "    import numpy as np\n",
    "    \n",
    "    \n",
    "    # 循环每个用户对应每个关键词处理\n",
    "    for row in partitions:\n",
    "        \n",
    "        # 计算时间系数\n",
    "        t = datetime.now() - datetime.strptime(row.action_time, '%Y-%m-%d %H:%M:%S')\n",
    "        alpha = 1 / (np.log(t.days + 1) + 1)\n",
    "        \n",
    "        # 判断一下这个关键词对应的操作文章时间大小的权重处理\n",
    "        if row.read_time  == '':\n",
    "            read_t = 0\n",
    "        else:\n",
    "            read_t = int(row.read_time)\n",
    "        \n",
    "        # 阅读时间的行为分数计算出来\n",
    "        read_score = weightsOfaction['read_middle'] if read_t > 1000 else weightsOfaction['read_min']\n",
    "        \n",
    "        # 计算row.topic的权重\n",
    "        weights = alpha * (row.shared * weightsOfaction['share'] + row.clicked * weightsOfaction['click'] +\n",
    "                          row.collected * weightsOfaction['collect'] + read_score)\n",
    "        \n",
    "        # user_profilehbase表\n",
    "        #        with pool.connection() as conn:\n",
    "#            table = conn.table('user_profile')\n",
    "#            table.put('user:{}'.format(row.user_id).encode(),\n",
    "#                      {'partial:{}:{}'.format(row.channel_id, row.topic).encode(): json.dumps(\n",
    "#                          weigths).encode()})\n",
    "#            conn.close()\n",
    "        \n",
    "\n",
    "\n",
    "user_topic.foreachPartition(compute_user_label_weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
